Celeb_a dataset(self making)

tensorflow dataset은
dataset.as_numpy_iterator()를 리스트함으로서 데이터 셋의 내용을 확인할 수 있다.
import tensorflow as tf
import pathlib
import numpy as np
import tensorflow_datasets as tfds
from PIL import Image
TRAIN_NUM=16000
VALID_NUM=1000
tf.random.set_seed(1)
def count_items(ds):
n=0
for _ in ds:
n+=1
return n
imgdir_path=pathlib.Path('/Users/csian/tensorflow_datasets/downloads/manual/img_align_celeba/')
file_list=sorted([str(path) for path in imgdir_path.glob('*.jpg')])
fp=open('/Users/csian/tensorflow_datasets/downloads/manual/list_attr_celeba.txt', 'r')
line=fp.readline()
leng=int(line)
line=fp.readline()
index=line.split(' ')
loc=index.index('Male')
sex=[]
for i in range(leng):
line=fp.readline()
line=line[11:-1]
line=line.replace(' ', ' ')
line=line.split(' ')
if line[loc]=='-1':
li=0
else:
li=1
sex.append(li)
fp.close()
labels=tf.convert_to_tensor(sex)
labels=labels[:TRAIN_NUM+VALID_NUM]
tensor_list=[]
for i in range(TRAIN_NUM+VALID_NUM):
image=Image.open(file_list[i])
data=np.array(image)
td=tf.convert_to_tensor(data)
tensor_list.append(td)
print('%d / %d' %(i+1, TRAIN_NUM+VALID_NUM))
ds=tf.data.Dataset.from_tensor_slices((tensor_list, labels))
ds=ds.shuffle(TRAIN_NUM+VALID_NUM, reshuffle_each_iteration=False)
celeba_train=ds.take(16000)
celeba_valid=ds.skip(16000)
print(' : {}'.format(count_items(celeba_train)))
print(' : {}'.format(count_items(celeba_valid)))

훈련 데이터셋: 16000

검증 데이터셋: 1000

데이터셋 전처리
import numpy as np
BATCH_SIZE=32
BUFFER_SIZE=1000
IMAGE_SIZE=(64, 64)
steps_per_epoch=np.ceil(TRAIN_NUM/BATCH_SIZE)
def preprocess(example, size=(64, 64), mode='train'):
image=example[0]
label=example[1]
if mode=='train':
image_cropped=tf.image.random_crop(image, size=(178, 178, 3))
image_resized=tf.image.resize(image_cropped, size=size)
image_flip=tf.image.random_flip_left_right(image_resized)
return image_flip/255.0, tf.cast(label, tf.int32)
else:
image_cropped=tf.image.crop_to_bounding_box(image, offset_height=20, offset_width=0, target_height=178, target_width=178)
image_resized=tf.image.resize(image_cropped, size=size)
return image_resized/255.0, tf.cast(label, tf.int32)
labels=[]
tensor_list=[]
for example in celeba_train:
img, lab=preprocess(example, size=IMAGE_SIZE, mode='train')
img=tf.convert_to_tensor(img)
lab=tf.convert_to_tensor(lab)
tensor_list.append(img)
labels.append(lab)
ds_train=tf.data.Dataset.from_tensor_slices((tensor_list, labels))
ds_train=ds_train.shuffle(buffer_size=BUFFER_SIZE).repeat()
ds_train=ds_train.batch(BATCH_SIZE)
labels=[]
tensor_list=[]
for example in celeba_valid:
img, lab=preprocess(example, size=IMAGE_SIZE, mode='eva')
img=tf.convert_to_tensor(img)
lab=tf.convert_to_tensor(lab)
tensor_list.append(img)
labels.append(lab)
ds_valid=tf.data.Dataset.from_tensor_slices((tensor_list, labels))
ds_valid=ds_valid.batch(BATCH_SIZE)

>>> celeba_train

<TensorSliceDataset shapes: ((64, 64, 3), ()), types: (tf.float32, tf.int32)>

>>> len(celeba_train)

16000

>>> celeba_valid

<TensorSliceDataset shapes: ((64, 64, 3), ()), types: (tf.float32, tf.int32)>

>>> len(celeba_valid)

1000

Test data set(batch size=32)
import tensorflow as tf
import pathlib
import numpy as np
import tensorflow_datasets as tfds
from PIL import Image
TEST_NUM=32
imgdir_path=pathlib.Path('/Users/csian/tensorflow_datasets/downloads/manual/img_align_celeba/')
file_list=sorted([str(path) for path in imgdir_path.glob('*.jpg')])
def preprocess(example, size=(64, 64), mode='train'):
image=example[0]
label=example[1]
if mode=='train':
image_cropped=tf.image.random_crop(image, size=(178, 178, 3))
image_resized=tf.image.resize(image_cropped, size=size)
image_flip=tf.image.random_flip_left_right(image_resized)
return image_flip/255.0, tf.cast(label, tf.int32)
else:
image_cropped=tf.image.crop_to_bounding_box(image, offset_height=20, offset_width=0, target_height=178, target_width=178)
image_resized=tf.image.resize(image_cropped, size=size)
return image_resized/255.0, tf.cast(label, tf.int32)
fp=open('/Users/csian/tensorflow_datasets/downloads/manual/list_attr_celeba.txt', 'r')
line=fp.readline()
leng=int(line)
line=fp.readline()
index=line.split(' ')
loc=index.index('Male')
sex=[]
for i in range(leng):
line=fp.readline()
line=line[11:-1]
line=line.replace(' ', ' ')
line=line.split(' ')
if line[loc]=='-1':
li=0
else:
li=1
sex.append(li)
fp.close()
labels=tf.convert_to_tensor(sex)
labels=labels[TRAIN_NUM+VALID_NUM:TRAIN_NUM+VALID_NUM+TEST_NUM]
tensor_list=[]
for i in range(TEST_NUM):
i=i+TRAIN_NUM+VALID_NUM
image=Image.open(file_list[i])
data=np.array(image)
td=tf.convert_to_tensor(data)
tensor_list.append(td)
print('%d / %d' %(i-TRAIN_NUM-VALID_NUM+1, TEST_NUM))
ds=tf.data.Dataset.from_tensor_slices((tensor_list, labels))
labels=[]
tensor_list=[]
for example in ds:
img, lab=preprocess(example, size=IMAGE_SIZE, mode='train')
img=tf.convert_to_tensor(img)
lab=tf.convert_to_tensor(lab)
tensor_list.append(img)
labels.append(lab)
ds_test=tf.data.Dataset.from_tensor_slices((tensor_list, labels))
ds_test=ds_test.batch(32)